1
|
|
|
/* |
2
|
|
|
* Copyright (c) 2018 Rafael da Silva Rocha. |
3
|
|
|
* |
4
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining |
5
|
|
|
* a copy of this software and associated documentation files (the |
6
|
|
|
* "Software"), to deal in the Software without restriction, including |
7
|
|
|
* without limitation the rights to use, copy, modify, merge, publish, |
8
|
|
|
* distribute, sublicense, and/or sell copies of the Software, and to |
9
|
|
|
* permit persons to whom the Software is furnished to do so, subject to |
10
|
|
|
* the following conditions: |
11
|
|
|
* |
12
|
|
|
* The above copyright notice and this permission notice shall be |
13
|
|
|
* included in all copies or substantial portions of the Software. |
14
|
|
|
* |
15
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
16
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
17
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
18
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
19
|
|
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
20
|
|
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
21
|
|
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
22
|
|
|
* |
23
|
|
|
*/ |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @fileoverview Functions to serialize and deserialize UTF-8 strings. |
27
|
|
|
* @see https://github.com/rochars/byte-data |
28
|
|
|
* @see https://encoding.spec.whatwg.org/#the-encoding |
29
|
|
|
* @see https://encoding.spec.whatwg.org/#utf-8-encoder |
30
|
|
|
*/ |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* Read a string of UTF-8 characters from a byte buffer. |
34
|
|
|
* Invalid characters are replaced with 'REPLACEMENT CHARACTER' (U+FFFD). |
35
|
|
|
* @see https://encoding.spec.whatwg.org/#the-encoding |
36
|
|
|
* @see https://stackoverflow.com/a/34926911 |
37
|
|
|
* @param {!Uint8Array|!Array<number>} buffer A byte buffer. |
38
|
|
|
* @param {number} [start=0] The buffer index to start reading. |
39
|
|
|
* @param {number} [end=0] The buffer index to stop reading. |
40
|
|
|
* Assumes the buffer length if undefined. |
41
|
|
|
* @return {string} |
42
|
|
|
*/ |
43
|
|
|
export function unpack(buffer, start=0, end=buffer.length) { |
44
|
|
|
/** @type {string} */ |
45
|
|
|
let str = ''; |
46
|
|
|
for(let index = start; index < end;) { |
47
|
|
|
/** @type {number} */ |
48
|
|
|
let lowerBoundary = 0x80; |
49
|
|
|
/** @type {number} */ |
50
|
|
|
let upperBoundary = 0xBF; |
51
|
|
|
/** @type {boolean} */ |
52
|
|
|
let replace = false; |
53
|
|
|
/** @type {number} */ |
54
|
|
|
let charCode = buffer[index++]; |
55
|
|
|
if (charCode >= 0x00 && charCode <= 0x7F) { |
56
|
|
|
str += String.fromCharCode(charCode); |
57
|
|
|
} else { |
58
|
|
|
/** @type {number} */ |
59
|
|
|
let count = 0; |
60
|
|
|
if (charCode >= 0xC2 && charCode <= 0xDF) { |
61
|
|
|
count = 1; |
62
|
|
|
} else if (charCode >= 0xE0 && charCode <= 0xEF ) { |
63
|
|
|
count = 2; |
64
|
|
|
if (buffer[index] === 0xE0) { |
65
|
|
|
lowerBoundary = 0xA0; |
66
|
|
|
} |
67
|
|
|
if (buffer[index] === 0xED) { |
68
|
|
|
upperBoundary = 0x9F; |
69
|
|
|
} |
70
|
|
|
} else if (charCode >= 0xF0 && charCode <= 0xF4 ) { |
71
|
|
|
count = 3; |
72
|
|
|
if (buffer[index] === 0xF0) { |
73
|
|
|
lowerBoundary = 0x90; |
74
|
|
|
} |
75
|
|
|
if (buffer[index] === 0xF4) { |
76
|
|
|
upperBoundary = 0x8F; |
77
|
|
|
} |
78
|
|
|
} else { |
79
|
|
|
replace = true; |
80
|
|
|
} |
81
|
|
|
charCode = charCode & (1 << (8 - count - 1)) - 1; |
82
|
|
|
for (let i = 0; i < count; i++) { |
83
|
|
|
if (buffer[index] < lowerBoundary || buffer[index] > upperBoundary) { |
84
|
|
|
replace = true; |
85
|
|
|
} |
86
|
|
|
charCode = (charCode << 6) | (buffer[index] & 0x3f); |
87
|
|
|
index++; |
88
|
|
|
} |
89
|
|
|
if (replace) { |
90
|
|
|
str += String.fromCharCode(0xFFFD); |
91
|
|
|
} |
92
|
|
|
else if (charCode <= 0xffff) { |
93
|
|
|
str += String.fromCharCode(charCode); |
94
|
|
|
} else { |
95
|
|
|
charCode -= 0x10000; |
96
|
|
|
str += String.fromCharCode( |
97
|
|
|
((charCode >> 10) & 0x3ff) + 0xd800, |
98
|
|
|
(charCode & 0x3ff) + 0xdc00); |
99
|
|
|
} |
100
|
|
|
} |
101
|
|
|
} |
102
|
|
|
return str; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* Write a string of UTF-8 characters to a byte buffer. |
107
|
|
|
* @see https://encoding.spec.whatwg.org/#utf-8-encoder |
108
|
|
|
* @param {string} str The string to pack. |
109
|
|
|
* @param {!Uint8Array|!Array<number>} buffer The buffer to pack the string to. |
110
|
|
|
* @param {number=} index The buffer index to start writing. |
111
|
|
|
* @return {number} The next index to write in the buffer. |
112
|
|
|
* @throws {TypeError} If 'str' is not a string. |
113
|
|
|
*/ |
114
|
|
|
export function pack(str, buffer, index=0) { |
115
|
|
|
if (typeof str !== 'string') { |
116
|
|
|
throw new TypeError(); |
117
|
|
|
} |
118
|
|
|
/** @type {number} */ |
119
|
|
|
let i = 0; |
120
|
|
|
/** @type {number} */ |
121
|
|
|
let len = str.length; |
122
|
|
|
while (i < len) { |
123
|
|
|
/** @type {number} */ |
124
|
|
|
let codePoint = str.codePointAt(i); |
125
|
|
|
if (codePoint < 128) { |
126
|
|
|
buffer[index] = codePoint; |
127
|
|
|
index++; |
128
|
|
|
} else { |
129
|
|
|
/** @type {number} */ |
130
|
|
|
let count = 0; |
131
|
|
|
/** @type {number} */ |
132
|
|
|
let offset = 0; |
133
|
|
|
if (codePoint <= 0x07FF) { |
134
|
|
|
count = 1; |
135
|
|
|
offset = 0xC0; |
136
|
|
|
} else if(codePoint <= 0xFFFF) { |
137
|
|
|
count = 2; |
138
|
|
|
offset = 0xE0; |
139
|
|
|
} else if(codePoint <= 0x10FFFF) { |
140
|
|
|
count = 3; |
141
|
|
|
offset = 0xF0; |
142
|
|
|
i++; |
143
|
|
|
} |
144
|
|
|
buffer[index] = (codePoint >> (6 * count)) + offset; |
145
|
|
|
index++; |
146
|
|
|
while (count > 0) { |
147
|
|
|
buffer[index] = 0x80 | (codePoint >> (6 * (count - 1)) & 0x3F); |
148
|
|
|
index++; |
149
|
|
|
count--; |
150
|
|
|
} |
151
|
|
|
} |
152
|
|
|
i++; |
153
|
|
|
} |
154
|
|
|
return index; |
155
|
|
|
} |
156
|
|
|
|